# -*- coding: utf-8 -*-
"""
Created on Mon Nov 16 09:00:39 2020

@author: Alain ETIENNE, Arts et Métiers
"""

import random as rnd
import matplotlib.pyplot as plt
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.utils import plot_model

def generate_classification_case1(nbpoints, plot = True):
    '''
    Generation of the training set by randomly generating the data (classifier is linear)

    Parameters
    ----------
    nbpoints : integer
        Number of points generated by the function.
    plot : boolean, optional
        Boolean parameters showing or not the input points plot. The default is True.

    Returns
    -------
    t-uple of numpy arrays
        return a couple of numpy arrays containing the input and output of the problem to solve.

    '''
    X = [rnd.random()*2 - 1.0 for _ in range(nbpoints)]
    Y = [rnd.random()*2 - 1.0 for _ in range(nbpoints)]
    
    inputpoints, expected_outputs = [], []
    goods, bads = [], []
        
    # This section of the code labels the data on the two classes 
    # (to ease the calculous of the prediction error: the first class is labelled 0.0 and the second class is labelled 1.0 )
    for x,y in zip(X,Y):
        inputpoints.append([x,y])
        if x + y > 0:
            expected_outputs.append(1.0)
            goods.append((x,y))
        else:
            expected_outputs.append(0.0)
            bads.append((x,y))
    if plot:
        # Plotting the training population
        plt.figure("Input population - Linear separator")
        plt.scatter([i[0] for i in goods],[i[1] for i in goods],c="green")
        plt.scatter([i[0] for i in bads],[i[1] for i in bads],c = "red")
        plt.plot()
            
    return np.array(inputpoints), np.array(expected_outputs)

def generate_classification_case2(nbpoints, plot = True):
    '''
    Generation of the training set by randomly generating the data (classifier is non linear)

    Parameters
    ----------
    nbpoints : integer
        Number of points generated by the function.
    plot : boolean, optional
        Boolean parameters showing or not the input points plot. The default is True.

    Returns
    -------
    t-uple of numpy arrays
        return a couple of numpy arrays containing the input and output of the problem to solve.

    '''
    from math import pi
    
    X = [rnd.random()*2 - 1.0 for _ in range(nbpoints)]
    Y = [rnd.random()*2 - 1.0 for _ in range(nbpoints)]
    
    inputpoints, expected_outputs = [], []
    goods, bads = [], []
        
    # This section of the code labels the data on the two classes 
    # (to ease the calculous of the prediction error: the first class is labelled 0.0 and the second class is labelled 1.0 )
    for x,y in zip(X,Y):
        inputpoints.append([x,y])
        if x**2 + y**2 > 2/(pi):
            expected_outputs.append(1.0)
            goods.append((x,y))            
        else:
            expected_outputs.append(0.0)
            bads.append((x,y))
    
    if plot:
        # Plotting the training population
        plt.figure("Input population - Non linear separator")
        plt.scatter([i[0] for i in goods],[i[1] for i in goods],c="green")
        plt.scatter([i[0] for i in bads],[i[1] for i in bads],c = "red")
        plt.plot()
            
    return np.array(inputpoints), np.array(expected_outputs)

def generate_regression_case(nbpoints, noise = 10, plot = True):
    '''
    Generation of a noisy set of point following this function : y =4x^3 -10x -3

    Parameters
    ----------
    nbpoints : integer
        Number of points generated by the function.
    noise : float, optional
        Number in percent represeing the amount of noise to add to the set of points. The default is 10%.
    plot : boolean, optional
        Boolean parameters showing or not the input points plot. The default is True.

    Returns
    -------
    t-uple of numpy arrays
        return a couple of numpy arrays containing the input and output of the problem to solve.

    '''
        
    my_function = lambda a : 4*a**3 -10*a - 3 + rnd.random()*10*(noise/100)
    
    X = [rnd.random()*6 - 3.0 for _ in range(nbpoints)]
    Y = [my_function(x) for x in X]
    
    if plot:
        # Plotting the training population
        plt.title("Input population - Regression")
        plt.scatter(X,Y)
        plt.plot()
        plt.show()
    
    return np.array(X), np.array(Y)

def load_data_exercice_CamexIA(type_of_problem, KC_to_analyse):
    '''
    This function loads dataset, normalizes them, and split them into 3 sets that it returns as numpy arrays.
    The expected inputs are:
        - the type of problem to solve (CLA for classification and ACP for regression)
        - the Key Caracteristics to analayse (KC4 or KC7)

    Parameters
    ----------
    type_of_problem : string
        the type of problem to solve ("CLA" for classification and "ACP" for regression).
    KC_to_analyse : string
        Key Caracteristics to analayse ("KC4" or "KC7").

    Returns
    -------
    inputs_train : numpy array
        input dataset to use for the training of the network.
    outputs_train : numpy array
        output dataset to use for the training of the network.
    inputs_test : numpy array
        input dataset to use for testing the network
    outputs_test : numpy array
        output dataset to use for testing the network.
    heads : numpy array
        list of the labls of the input dataset.
    '''
    
    if type_of_problem not in ["CLA, ACP"] and KC_to_analyse not in ["KC4","KC7"]:
        print("The input parameters are not well set. Aborted")
        return None    
    
    # Data loading from CSV file
    import pandas as pd
    data = pd.read_csv("factory_process_{}_{}.csv".format(type_of_problem, KC_to_analyse), sep = ";", header = 0)
    heads = data.columns.values
    data = data.to_numpy()
    
    # Extraction of input/output data for the detection of default case
    outputs, inputs = data[:,-1], data[:,0:-1]
    
    # Data normalizing (to avoid having too relative hueg parameters regarding others)
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    scaler.fit(inputs)
    inputs = scaler.transform(inputs)
    
    # Datasets allocation between trainig and testing sets     
    from sklearn.model_selection import train_test_split
    inputs_train, inputs_test, outputs_train, outputs_test, = train_test_split(inputs, outputs, test_size = 0.05, random_state = 42)
    
    return inputs_train, outputs_train, inputs_test, outputs_test, heads


### Template à suivre pour chaque exercice

# Génération ou chargement du jeu de données (à changer en fonction du problème à traiter)
inputpoints, expected_outputs= generate_classification_case1(500, True)

# TODO: Définition de la structure du réseau adaptée au problème et à la structure de données

# TODO: Contrôler et vérifier la structure du réseau

# TODO: Definir les paramètres d'optimisation

# TODO: Definir et executer l'entrainement

# TODO: Tracer l'évolution de la fonction perte au cours de l'optimisation (entrainement)

# TODO: Générer un jeu d'essai et de test pour voir la prédiction du système
